1 Introduction

The purpose of this notebook is to annotate the cell types of the two new T cell culturing & activation experiments (replicate 2). It will follow the same steps as replicate 1, so we refer to the notebook “03-annotation.Rmd” for a full explanation of each step.

2 Pre-processing

2.1 Package loading

library(scater)
library(scran)
library(Seurat)
library(ggpubr)
library(kBET)
library(cluster)
library(AnnotationDbi)
library(org.Hs.eg.db)
library(viridis)
library(tidyverse)

2.2 Source script with functions

source("bin/utils.R")

2.3 Load data

t_act_l <- readRDS("results/R_objects/t_act_Seurat_list_filtered_normalized.rds")

Let us filter the previous list to retain only replicate 2:

t_act_l <- t_act_l[c("day_0_rep2", "day_1_rep2")]

3 Find Variable Genes

t_act_0_rep2 <- SplitObject(t_act_l$day_0_rep2, split.by = "donor")
t_act_1_rep2 <- SplitObject(t_act_l$day_1_rep2, split.by = "donor")
t_act_l <- list(
  t_act_0_rep2$Female2, 
  t_act_0_rep2$Female3, 
  t_act_1_rep2$Female2, 
  t_act_1_rep2$Female3
)
names(t_act_l) <- c("0_rep2_F2", "0_rep2_F3", "1_rep2_F2", "1_rep2_F3")
t_act_l <- purrr::map(t_act_l, FindVariableFeatures)
var_plots <- purrr::map(t_act_l, VariableFeaturePlot)
var_plots <- purrr::map2(t_act_l, var_plots, function(seurat, p) {
  LabelPoints(
  plot = p, 
  points =  head(VariableFeatures(seurat), 10), 
  repel = TRUE
  )
})
var_plots
## $`0_rep2_F2`

## 
## $`0_rep2_F3`

## 
## $`1_rep2_F2`

## 
## $`1_rep2_F3`

Again, in the activated cells (day 1), we see that interferon gamma is the most variable gene.

4 Scale data

t_act_l <- purrr::map(t_act_l, ScaleData)

5 Linear dimensionality reduction (PCA)

t_act_l <- purrr::map(t_act_l, RunPCA)
purrr::map(t_act_l, VizDimLoadings, dims = 1:3, reduction = "pca")
## $`0_rep2_F2`

## 
## $`0_rep2_F3`

## 
## $`1_rep2_F2`

## 
## $`1_rep2_F3`

purrr::map(t_act_l, DimPlot, reduction = "pca")
## $`0_rep2_F2`

## 
## $`0_rep2_F3`

## 
## $`1_rep2_F2`

## 
## $`1_rep2_F3`

# Cluster cells

t_act_l <- purrr::map(t_act_l, FindNeighbors, dims = 1:20)
# resolutions <- c(
#   0.005,
#   seq(0.01, 0.1, by = 0.01),
#   seq(0.1, 1, by = 0.1), seq(1, 10, by = 1)
# )
# avgs_sil_width_dfs <- purrr::map2(t_act_l, names(t_act_l), function(seurat, condition) {
#   print(condition)
#   avgs_sil_width_df <- data.frame(num_k = c(), sil_width = c(), resolution = c())
#   num_k <- 1
#   for (res in resolutions) {
#     print(str_c("Current resolution is ", res))
#     seurat <-  FindClusters(seurat, resolution = res, verbose = FALSE)
#     curr_num_k <- length(levels(seurat$seurat_clusters))
#     if (curr_num_k == num_k) {
#       next
#     } else {
#       sil_width <- calc_sil_width(
#         object = seurat, 
#         clusters = seurat$seurat_clusters, 
#         npcs = 5, 
#         ncell = 2500
#       )
#       print(str_c("Current silhouette width is ", sil_width))
#       curr_df <- data.frame(num_k = curr_num_k, sil_width = sil_width, resolution = res)
#       avgs_sil_width_df <- rbind(avgs_sil_width_df, curr_df)
#       num_k <- curr_num_k 
#       print(str_c("Current number of clusters is ", num_k))
#     }
#   }
#   avgs_sil_width_df
# })
# resolution_plots <- purrr::map2(avgs_sil_width_dfs, names(avgs_sil_width_dfs), function(df, cond) {
#   ggplot(df, aes(num_k, sil_width, label = num_k)) +
#     geom_text() +
#     labs(title = cond,
#          x = "Number of clusters (k)", 
#          y = "Average Silhouette Width") +
#     theme_classic() +
#     theme(plot.title = element_text(hjust = 0.5))
# })
# resolution_plots

We aim to determine a number of clusters that maximizes silhouette width while preventing overstratification. Judgding by the previous plots, we define the following k:

  • 0_rep2_F2: 5
  • 0_rep2_F3: 4
  • 1_rep2_F2: 5
  • 1_rep2_F3: 5
# optimal_k <- c(5, 4, 5, 5)
# t_act_l <- purrr::pmap(list(t_act_l, avgs_sil_width_dfs, optimal_k), function(seurat, df, opt_k) {
#   opt_resolution <- df[df$num_k == opt_k, "resolution"]
#   seurat <- FindClusters(object = seurat, resolution = opt_resolution)
#   seurat
# })
# purrr::map(t_act_l, ~ length(levels(.x$seurat_clusters)))
resolutions <- c(0.1, 0.1, 0.3, 0.15)
t_act_l <- purrr::map2(t_act_l, resolutions, function(seurat, res) {
  seurat <- FindClusters(seurat, resolution = res)
  seurat
})
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4753
## Number of edges: 195957
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9702
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4153
## Number of edges: 168564
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9654
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3963
## Number of edges: 146453
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8835
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4951
## Number of edges: 181035
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9257
## Number of communities: 5
## Elapsed time: 0 seconds

6 Non-linear dimensionality reduction

t_act_l <- purrr::map(t_act_l, function(seurat) {
  seurat@reductions$PCA <- NULL
  seurat@reductions$TSNE <- NULL
  seurat@reductions$UMAP <- NULL
  seurat
})
t_act_l <- purrr::map(t_act_l, RunTSNE, reduction = "pca", dims = 1:20)
t_act_l <- purrr::map(t_act_l, RunUMAP, reduction = "pca", dims = 1:20)
purrr::map(t_act_l, DimPlot, reduction = "tsne")
## $`0_rep2_F2`

## 
## $`0_rep2_F3`

## 
## $`1_rep2_F2`

## 
## $`1_rep2_F3`

purrr::map(t_act_l, DimPlot, reduction = "umap")
## $`0_rep2_F2`

## 
## $`0_rep2_F3`

## 
## $`1_rep2_F2`

## 
## $`1_rep2_F3`

7 Find cluster markers

markers_l <- purrr::map(t_act_l, FindAllMarkers, only.pos = TRUE)
purrr::map(markers_l, DT::datatable)
## $`0_rep2_F2`
## 
## $`0_rep2_F3`
## 
## $`1_rep2_F2`
## 
## $`1_rep2_F3`
DT::datatable(markers_l$`0_rep2_F2`)
DT::datatable(markers_l$`0_rep2_F3`)
DT::datatable(markers_l$`1_rep2_F2`)
DT::datatable(markers_l$`1_rep2_F3`)
# saveRDS(markers_l, "results/R_objects/t_act_markers_list_rep2.rds")
marker_selection <- purrr::map2(t_act_l, markers_l, function(seurat, df) {
  num_k <- length(unique(seurat$seurat_clusters))
  selection <- unlist(purrr::map(0:(num_k-1), ~df[df$cluster == .x, "gene"][0:8]))
  selection
})
heatmaps_markers <- purrr::map2(t_act_l, marker_selection, ~DoHeatmap(.x, features = .y) + NoLegend())
heatmaps_markers
## $`0_rep2_F2`

## 
## $`0_rep2_F3`

## 
## $`1_rep2_F2`

## 
## $`1_rep2_F3`

8 Cell cycle scoring

t_act_l <- purrr::map(t_act_l, function(seurat) {
  s_genes <- cc.genes$s.genes[cc.genes$s.genes %in% rownames(seurat@assays$RNA@scale.data)]
  g2m_genes <- cc.genes$g2m.genes[cc.genes$g2m.genes %in% rownames(seurat@assays$RNA@scale.data)]
  seurat <- CellCycleScoring(object = seurat, s.features = s_genes, g2m.features = g2m_genes)
  seurat
})
cycling_gg <- purrr::map(t_act_l, FeaturePlot, features = c("S.Score", "G2M.Score"), cols = viridis(20))
cycling_gg
## $`0_rep2_F2`

## 
## $`0_rep2_F3`

## 
## $`1_rep2_F2`

## 
## $`1_rep2_F3`

9 Annotation

9.1 0_rep2_F2

Cluster ID Cell type
0 CD4 T-cell
1 NK
2 Monocyte
3 B-cell
4 CD8 T-cell
5 FCGR3A Monocyte
6 Unknown
# CD4+ T cells show high expression of IL7R.
# CD8+ T cells show high expression of both NKG7 and CD3D.
# NK cells show high expression of both NKG7 and GNLY.
FeaturePlot(t_act_l$`0_rep2_F2`, features = c("IL7R", "CD3D", "NKG7", "GNLY"))

t_act_l$`0_rep2_F2`$cell_type <- case_when(
  t_act_l$`0_rep2_F2`$seurat_clusters == "0" ~ "CD4 T-cell",
  t_act_l$`0_rep2_F2`$seurat_clusters == "1" ~ "NK",
  t_act_l$`0_rep2_F2`$seurat_clusters == "2" ~ "Monocyte",
  t_act_l$`0_rep2_F2`$seurat_clusters == "3" ~ "B-cell",
  t_act_l$`0_rep2_F2`$seurat_clusters == "4" ~ "CD8 T-cell",
  t_act_l$`0_rep2_F2`$seurat_clusters == "5" ~ "FCGR3A Monocyte",
  t_act_l$`0_rep2_F2`$seurat_clusters == "6" ~ "Unknown"
)
Idents(t_act_l$`0_rep2_F2`) <- "cell_type"
DimPlot(t_act_l$`0_rep2_F2`, reduction = "umap", label = TRUE) + NoLegend()

9.2 0_rep2_F3

Cluster ID Cell type
0 CD4 T-cell
1 Monocyte
2 CD8 T-cell
3 NK
4 B-cell
5 FCGR3A Monocyte
FeaturePlot(t_act_l$`0_rep2_F3`, features = c("IL7R", "CD3D", "NKG7", "GNLY"))

t_act_l$`0_rep2_F3`$cell_type <- case_when(
  t_act_l$`0_rep2_F3`$seurat_clusters == "0" ~ "CD4 T-cell",
  t_act_l$`0_rep2_F3`$seurat_clusters == "1" ~ "Monocyte",
  t_act_l$`0_rep2_F3`$seurat_clusters == "2" ~ "CD8 T-cell",
  t_act_l$`0_rep2_F3`$seurat_clusters == "3" ~ "NK",
  t_act_l$`0_rep2_F3`$seurat_clusters == "4" ~ "B-cell",
  t_act_l$`0_rep2_F3`$seurat_clusters == "5" ~ "FCGR3A Monocyte"
)
Idents(t_act_l$`0_rep2_F3`) <- "cell_type"
DimPlot(t_act_l$`0_rep2_F3`, reduction = "umap", label = TRUE) + NoLegend()

9.3 1_rep2_F2

Cluster ID Cell type
0 Cycling CD4 T-cell
1 NK
2 Activated CD4 T-cell
3 B-cell
4 CD8 T-cell
5 Unknown
FeaturePlot(t_act_l$`1_rep2_F2`, features = c("IL7R", "CD3D", "NKG7", "GNLY"))

t_act_l$`1_rep2_F2`$cell_type <- case_when(
  t_act_l$`1_rep2_F2`$seurat_clusters == "0" ~ "Cycling CD4 T-cell",
  t_act_l$`1_rep2_F2`$seurat_clusters == "1" ~ "NK",
  t_act_l$`1_rep2_F2`$seurat_clusters == "2" ~ "Activated CD4 T-cell",
  t_act_l$`1_rep2_F2`$seurat_clusters == "3" ~ "B-cell",
  t_act_l$`1_rep2_F2`$seurat_clusters == "4" ~ "CD8 T-cell",
  t_act_l$`1_rep2_F2`$seurat_clusters == "5" ~ "Unknown"
)
Idents(t_act_l$`1_rep2_F2`) <- "cell_type"
DimPlot(t_act_l$`1_rep2_F2`, reduction = "umap", label = TRUE) + NoLegend()

9.4 1_rep2_F3

Cluster ID Cell type
0 CD4 T-cell
1 NK
2 CD8 T-cell
3 B-cell
4 Unknown
FeaturePlot(t_act_l$`1_rep2_F3`, features = c("IL7R", "CD3D", "NKG7", "GNLY"))

t_act_l$`1_rep2_F3`$cell_type <- case_when(
  t_act_l$`1_rep2_F3`$seurat_clusters == "0" ~ "CD4 T-cell",
  t_act_l$`1_rep2_F3`$seurat_clusters == "1" ~ "NK",
  t_act_l$`1_rep2_F3`$seurat_clusters == "2" ~ "CD8 T-cell",
  t_act_l$`1_rep2_F3`$seurat_clusters == "3" ~ "B-cell",
  t_act_l$`1_rep2_F3`$seurat_clusters == "4" ~ "Unknown"
)
Idents(t_act_l$`1_rep2_F3`) <- "cell_type"

# CD4 T-cell can be subdivided into cycling and activated (looking at the S score and previous donor)
seurat_0 <- t_act_l$`1_rep2_F3`
seurat_0 <- subset(seurat_0, idents = "CD4 T-cell")
seurat_0 <- pre_process_seurat(seurat_0)
seurat_0 <- FindNeighbors(seurat_0)
seurat_0 <- FindClusters(seurat_0, resolution = 0.15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3218
## Number of edges: 103122
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8649
## Number of communities: 2
## Elapsed time: 0 seconds
DimPlot(seurat_0)

markers_0 <- FindAllMarkers(seurat_0, only.pos = TRUE)
FeaturePlot(seurat_0, features = c("S.Score", "POLR2H", "POLR3K"))

cells_0_0 <- colnames(seurat_0)[seurat_0$seurat_clusters == "0"]
cells_0_1 <- colnames(seurat_0)[seurat_0$seurat_clusters == "1"]
t_act_l$`1_rep2_F3`$cell_type[colnames(t_act_l$`1_rep2_F3`) %in% cells_0_0] <- "Cycling CD4 T-cell"
t_act_l$`1_rep2_F3`$cell_type[colnames(t_act_l$`1_rep2_F3`) %in% cells_0_1] <- "Activated CD4 T-cell"

# Plot
DimPlot(t_act_l$`1_rep2_F3`, reduction = "umap", label = TRUE) + NoLegend()

10 Save

# saveRDS(t_act_l, file = "results/R_objects/t_act_Seurat_list_reannotated_rep2.rds")

11 Session Info

sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.0               stringr_1.4.0               dplyr_0.8.4                 purrr_0.3.3                 readr_1.3.1                 tidyr_1.0.2                 tibble_2.1.3                tidyverse_1.3.0             viridis_0.5.1               viridisLite_0.3.0           org.Hs.eg.db_3.10.0         AnnotationDbi_1.48.0        cluster_2.1.0               kBET_0.99.6                 ggpubr_0.2.5                magrittr_1.5                Seurat_3.1.4                scran_1.14.6                scater_1.14.6               ggplot2_3.3.0               SingleCellExperiment_1.8.0  SummarizedExperiment_1.16.1 DelayedArray_0.12.2         BiocParallel_1.20.1         matrixStats_0.55.0          Biobase_2.46.0              GenomicRanges_1.38.0        GenomeInfoDb_1.22.0         IRanges_2.20.2              S4Vectors_0.24.3            BiocGenerics_0.32.0         BiocStyle_2.14.4           
## 
## loaded via a namespace (and not attached):
##   [1] reticulate_1.14          tidyselect_1.0.0         RSQLite_2.2.0            htmlwidgets_1.5.1        grid_3.6.1               Rtsne_0.15               munsell_0.5.0            codetools_0.2-16         mutoss_0.1-12            ica_1.0-2                DT_0.12                  statmod_1.4.34           future_1.16.0            withr_2.1.2              colorspace_1.4-1         knitr_1.28               rstudioapi_0.11          ROCR_1.0-7               ggsignif_0.6.0           gbRd_0.4-11              listenv_0.8.0            labeling_0.3             Rdpack_0.11-1            GenomeInfoDbData_1.2.2   mnormt_1.5-6             farver_2.0.3             bit64_0.9-7              vctrs_0.2.3              generics_0.0.2           TH.data_1.0-10           xfun_0.12                R6_2.4.1                 ggbeeswarm_0.6.0         rsvd_1.0.3               locfit_1.5-9.1           bitops_1.0-6             assertthat_0.2.1         promises_1.1.0           scales_1.1.0             multcomp_1.4-12          beeswarm_0.2.3           gtable_0.3.0             npsurv_0.4-0             globals_0.12.5           sandwich_2.5-1           rlang_0.4.5              splines_3.6.1           
##  [48] lazyeval_0.2.2           broom_0.5.5              BiocManager_1.30.10      yaml_2.2.1               reshape2_1.4.3           modelr_0.1.6             crosstalk_1.0.0          backports_1.1.5          httpuv_1.5.2             tools_3.6.1              bookdown_0.18            gplots_3.0.3             RColorBrewer_1.1-2       ggridges_0.5.2           TFisher_0.2.0            Rcpp_1.0.3               plyr_1.8.6               zlibbioc_1.32.0          RCurl_1.98-1.1           pbapply_1.4-2            cowplot_1.0.0            zoo_1.8-7                haven_2.2.0              ggrepel_0.8.1            fs_1.3.2                 RSpectra_0.16-0          magick_2.3               data.table_1.12.8        lmtest_0.9-37            reprex_0.3.0             RANN_2.6.1               mvtnorm_1.1-0            fitdistrplus_1.0-14      xtable_1.8-4             mime_0.9                 hms_0.5.3                patchwork_1.0.0          lsei_1.2-0               evaluate_0.14            readxl_1.3.1             gridExtra_2.3            compiler_3.6.1           KernSmooth_2.23-16       crayon_1.3.4             htmltools_0.4.0          later_1.0.0              RcppParallel_4.4.4      
##  [95] lubridate_1.7.4          DBI_1.1.0                dbplyr_1.4.2             MASS_7.3-51.5            Matrix_1.2-18            cli_2.0.2                gdata_2.18.0             metap_1.3                igraph_1.2.4.2           pkgconfig_2.0.3          sn_1.5-5                 numDeriv_2016.8-1.1      plotly_4.9.2             xml2_1.2.2               vipor_0.4.5              dqrng_0.2.1              multtest_2.42.0          XVector_0.26.0           rvest_0.3.5              bibtex_0.4.2.2           digest_0.6.25            sctransform_0.2.1        RcppAnnoy_0.0.15         tsne_0.1-3               rmarkdown_2.1            cellranger_1.1.0         leiden_0.3.3             uwot_0.1.5               edgeR_3.28.1             DelayedMatrixStats_1.8.0 shiny_1.4.0              gtools_3.8.1             lifecycle_0.1.0          nlme_3.1-145             jsonlite_1.6.1           BiocNeighbors_1.4.2      fansi_0.4.1              limma_3.42.2             pillar_1.4.3             lattice_0.20-40          fastmap_1.0.1            httr_1.4.1               plotrix_3.7-7            survival_3.1-8           glue_1.3.1               FNN_1.1.3                png_0.1-7               
## [142] bit_1.1-15.2             stringi_1.4.6            blob_1.2.1               BiocSingular_1.2.2       caTools_1.18.0           memoise_1.1.0            irlba_2.3.3              future.apply_1.4.0       ape_5.3